home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Collection of Tools & Utilities
/
Collection of Tools and Utilities.iso
/
edit
/
pt20pc.zip
/
RE2.ASM
< prev
next >
Wrap
Assembly Source File
|
1991-02-04
|
7KB
|
292 lines
;
; al = input character
; di = nlist tail
; si = clist tail
; dx = clist head
;
RE_TEXT SEGMENT WORD PUBLIC 'CODE'
assume cs:RE_TEXT
assume ds:RE_TEXT
assume es:RE_TEXT
extrn _reFast1:BYTE
extrn _reFast2:BYTE
;
; XTAG:re_scan
; re_scan( char * beginBuffer, char * endBuffer, char *segBuffer,
; char ** matchBegin, char ** matchEnd,
; int * numberOfNewlines );
;
; returns:
; 0 = NOT_FOUND -- RE was not found between beginBuffer
; and endBuffer
; 1 = FOUND -- RE was found between matchBegin and matchEnd
; 2 = PARTIAL_MATCH -- partial match at the end of the buffer
; starting at matchBegin
;
public _re_scan
_re_scan PROC FAR
push bp ; save bp
mov bp,sp ; set up bp to access the arguments
;
mov cs:dataSegment,ds ; save ds
;
push ds ; save ds and es
push es
push si ; save si and di
push di
;
push cs ; have all the segment registers point to
pop ds ; the beginning of the code segment
push cs
pop es
;
mov ax,[bp+6] ; pick up and save the arguments
mov cs:nextByteOffset,ax
mov cs:firstByteOffset,ax
mov ax,[bp+8]
mov cs:lastByteOffset,ax
; move the segment into a position so GetNextChar can pick it up with a LDS
mov ax,[bp+10]
mov cs:nextByteSegment,ax
;
init:
cld ; set search direction to forward
lea di,list1 ; initially list1=nlist and list2=clist
mov nlistTop,di ; but they are swapped after each character
lea si,list2
mov clistTop,si
mov dx,si ; dx = clist head
mov al,0AH ; "last char read" at beginning is a NL
jmp myxchg
;
;
finish:
; count the number of lines in the scanned bytes
push ax ; save ax since we need to change al
mov di,cs:firstByteOffset ; start the scan here
mov cx,cs:nextByteOffset ; compute the number of bytes to scan
mov ds,cs:nextByteSegment
mov es,cs:nextByteSegment
sub cx,di ; cx = the number of bytes
mov al,0AH ; 0AH = newline, ASCII line feed
xor dx,dx ; newline counter, start at 0 (of course)
NLLoop:
repne scasb ; scan to the next newline
jne noMoreNLs ; go to end of buffer w/o finding a newline
inc dx ; found another one, bump the counter
cmp cx,0
jne NLLoop ; find any more
noMoreNLs:
mov ds,cs:dataSegment
mov bx,[bp+16] ; get address of int to put newline count in
mov [bx],dx ; store the computed newline count
; end of newline counting
;
pop ax ; restore ax -- the return value
pop di
pop si
pop es
pop ds
pop bp ; restore bp
ret
_re_scan ENDP
;
public _re_cnode
_re_cnode:
pop bx ; get the return address
mov [si],bx ; save it in the clist
mov cx,thisMatchBegin
mov [si+2],cx
add si,4 ; mov clist tail (si)
add bx,3 ; generate bx+3 as an address
push bx ; push it so we can "JMP" to it
ret ; with a RET. We cannot use JMP 3[bx]
; directly since it will be indirect
;
public _re_nnode
extrn _addrJmpFound:WORD
_re_nnode:
pop bx ; get the return address
cmp bx,_addrJmpFound ; did we find it?
je _re_found
mov [di],bx ; save it in the nlist
mov cx,thisMatchBegin
mov [di+2],cx
add di,4 ; mov nlist tail (di)
; then move to next clist item
;
public _re_clist
_re_clist:
cmp dx,si
je myxchg ; head=tail => clist is empty
mov bx,dx ; move address into base register
mov cx,[bx+2]
mov thisMatchBegin,cx
add dx,4 ; move to the next item
jmp [bx]
;
public _re_found
_re_found:
mov cx,thisMatchBegin
dec cx ; since it was taken from nextByteOffset
; and so is one to big
mov ds,cs:dataSegment
mov bx,[bp+12]
mov [bx],cx ; store begin address of found RE
mov cx,cs:nextByteOffset
dec cx ; the RE ends here
mov bx,[bp+14]
mov [bx],cx ; store end address of found RE
mov ax,1 ; FOUND flag is returned in ax
jmp finish
;
;
myxchg:
cmp di,nlistTop
jne skipFastScan
;
; if this char is 0 then do not try fast searches, if it is not 0 then the
; first character of the RE is a single fixed character
;
cmp _reFast1,0
je skipFastScan
;
; If there is no pending list of partial RE matches, then try to speed up
; the search by scanning for the first character of the RE. The 8086 string
; search instructions are very fast for looking for a single character.
;
; get the parameters for the string scan
push ax ; save last char read (in al)
mov al,_reFast1
les di,DWORD PTR nextByteOffset
mov cx,lastByteOffset
cmp cx,di
jbe noScan
sub cx,di
inc cx ; since lastByteOffset points to a valid byte
mov dx,cx ; save cx for the second scan
repne scasb
jne notFound1 ; distinguish: not found - found at last char
dec di ; adjust since repne scasb goes one too far
notFound1:
mov cx,dx ; restore the count we saved
mov dx,di ; save the results of the first scasb in dx
mov al,_reFast2 ; get the second fast scan character
jnz doSecondScan ; only look for chars not equal to '\0'
mov di,lastByteOffset ; make sure this is not the lowest
jmp notFound2
doSecondScan:
mov di,WORD PTR nextByteOffset
repne scasb
jne notFound2
dec di
notFound2:
; use the one that came first (the lower one)
cmp di,dx
jbe useSecond
mov di,dx
useSecond:
mov nextByteOffset,di
noScan:
push cs
pop es ; restore es (NECESSARY since we use DI)
mov di,nlistTop ; restore di
pop ax ; restore last char read (back into al)
;
;
skipFastScan:
mov si,di ; set new clist tail
mov di,clistTop ; set new nlist tail
mov bx,nlistTop ; set up to exchange clist and nlist
mov nlistTop,di ; now do the reverse
mov clistTop,bx ; reversing is faster than moving the lists
mov dx,bx ; start adding at the top of the clist
;
; get the next character
;
lds bx,DWORD PTR nextByteOffset
cmp bx,cs:lastByteOffset
ja endOfSpan ; use unsigned comparison
mov BYTE PTR cs:_re_sidechars,al ; save last char read
mov al,[bx+1] ; get char after the next one
mov BYTE PTR cs:_re_sidechars+1,al ; save next char to read
mov al,[bx] ; get the next character (finally)
push cs ; restore DS to equal CS
pop ds
inc bx
cmp bx,lastByteOffset ; are we at the end of the buffer?
jb notAtEnd
mov ah,0AH ; if so, simulate a NL as the next char
mov BYTE PTR _re_sidechars+1,ah
notAtEnd:
mov nextbyteOffset,bx
mov thisMatchBegin,bx
jmp _re_code
;
endOfSpan:
;
mov es,cs:nextByteSegment
cmp dx,si
je noClist
mov bx,dx
mov dx,cs:[bx+2]
loop1:
add bx,4
cmp bx,si
je endLoop
cmp dx,cs:[bx+2]
jbe loop1
mov dx,cs:[bx+2]
jmp loop1
endLoop:
mov ds,cs:dataSegment ; restore ds
dec dx ; taken from nextByteOffset and so 1 too high
mov bx,[bp+12]
mov [bx],dx
mov ax,2
jmp over1
noClist:
xor ax,ax
over1:
jmp finish
;
even
public _re_code
_re_code:
db 1000 DUP (0)
;
even
nlistTop dw 0
clistTop dw 0
firstByteOffset dw 0
; These next words two must stay in this order since they are picked up with
; an LDS instruction that wants then in this order in two consecutive words
nextByteOffset dw 0
nextByteSegment dw 0
dataSegment dw 0
;
; Here we will keep the last character read and the next character to be read
;
public _re_sidechars
_re_sidechars dw 0
;
lastByteOffset dw 0
thisMatchBegin dw 0
;
public _re_wordtable
_re_wordtable db 0, 0, 0, 0, 0, 0, -1, 3
db -2, -1, -1, 7, -2, -1, -1, 7
db 0, 0, 0, 0, 0, 0, 0, 0
db 0, 0, 0, 0, 0, 0, 0, 0
;
even
list1:
db 400 DUP (0)
;
even
list2:
db 400 DUP (0)
;
RE_TEXT ENDS
end